In [1]:
import pandas as pd
import plotly.express as px
from scrapers import *
from viz import *
In [2]:
summ = summ_build(rescrape=False)
summ_sorted = summ.sort_values(by='Weight', ascending=False, ignore_index=True)
summ_sorted
Out[2]:
Ticker Security Industry Weight
0 AAPL Apple Inc. Information Technology 0.067667
1 MSFT Microsoft Corp. Information Technology 0.054304
2 AMZN Amazon.com Inc. Consumer Discretionary 0.045064
3 FB Facebook, Inc. Communication Services 0.021584
4 GOOGL Alphabet Inc. (Class A) Communication Services 0.017104
... ... ... ... ...
484 FTI TechnipFMC Energy 0.000133
485 HFC HollyFrontier Corp Energy 0.000124
486 UAA Under Armour (Class A) Consumer Discretionary 0.000103
487 UA Under Armour (Class C) Consumer Discretionary 0.000096
488 NWS News Corp. Class B Communication Services 0.000071

489 rows × 4 columns

In [3]:
panel = panel_build(rescrape=False)
panel
Out[3]:
Date MMM ABT ABBV ABMD ACN ATVI ADBE AMD AAP ... WYNN XEL XRX XLNX XYL YUM ZBRA ZBH ZION ZTS
0 2020-01-02 172.119888 85.256981 83.871666 168.809998 206.125336 58.266792 334.429993 49.099998 158.191467 ... 142.405029 60.782249 34.986931 100.115349 78.584686 99.762993 259.140015 148.020065 50.195282 133.169510
1 2020-01-03 170.637741 84.217628 83.075554 166.820007 205.782028 58.286655 331.809998 48.599998 158.201385 ... 140.292755 61.074562 34.532185 97.810677 79.047241 99.450531 256.049988 147.633148 49.486965 133.189362
2 2020-01-06 170.800308 84.658859 83.731171 179.039993 204.438293 59.349670 333.709992 48.389999 155.601410 ... 140.015091 60.986866 34.011127 95.771927 78.535492 99.391953 258.010010 146.779953 48.883930 132.166824
3 2020-01-07 170.111816 84.188210 83.253510 180.350006 200.024429 59.945747 333.390015 48.250000 153.755646 ... 140.679504 60.860199 34.058491 97.958405 78.240242 99.567711 256.470001 146.650970 48.587196 132.613556
4 2020-01-08 172.722321 84.531395 83.843567 178.690002 200.416824 59.488754 337.869995 47.830002 151.989243 ... 141.562103 60.801735 34.039543 97.682640 78.505951 99.743477 247.639999 148.406982 49.065796 132.325638
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
248 2020-12-24 173.093628 107.915245 102.066246 303.410004 256.423737 90.959999 499.859985 91.809998 160.679993 ... 114.389999 64.629997 22.333601 141.990005 99.966408 107.027023 376.890015 149.250000 43.052757 160.471298
249 2020-12-28 173.282074 107.357498 102.254044 312.910004 258.795624 91.430000 498.950012 91.599998 158.800003 ... 115.470001 65.320000 23.025963 141.520004 100.086075 109.256340 382.589996 148.889999 43.072624 162.138718
250 2020-12-29 172.429108 107.895332 103.489593 320.929993 256.882202 91.370003 502.109985 90.620003 156.690002 ... 114.309998 65.419998 22.660000 139.899994 99.228508 108.927910 378.040008 152.449997 42.486534 162.867584
251 2020-12-30 172.686966 108.004890 104.053001 323.920013 256.662933 91.580002 497.450012 92.290001 157.660004 ... 113.400002 65.650002 22.889999 142.100006 100.494911 108.987625 383.750000 152.220001 42.933552 164.015808
252 2020-12-31 173.361404 109.050674 105.911278 324.200012 260.320435 92.849998 500.119995 91.709999 157.509995 ... 112.830002 66.669998 23.190001 141.770004 101.502052 108.042152 384.329987 154.089996 43.152092 165.243912

253 rows × 490 columns

In [4]:
cum_growth = pd.concat([panel['Date'], panel.iloc[:, 1:].divide(panel.iloc[0, 1:]).subtract(1)], axis=1)
cum_growth
Out[4]:
Date MMM ABT ABBV ABMD ACN ATVI ADBE AMD AAP ... WYNN XEL XRX XLNX XYL YUM ZBRA ZBH ZION ZTS
0 2020-01-02 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 2020-01-03 -0.008611 -0.012191 -0.009492 -0.011788 -0.001666 0.000341 -0.007834 -0.010183 0.000063 ... -0.014833 0.004809 -0.012998 -0.02302 0.005886 -0.003132 -0.011924 -0.002614 -0.014111 0.000149
2 2020-01-06 -0.007667 -0.007016 -0.001675 0.060601 -0.008185 0.018585 -0.002153 -0.01446 -0.016373 ... -0.016783 0.003366 -0.027891 -0.043384 -0.000626 -0.003719 -0.004361 -0.008378 -0.026125 -0.007529
3 2020-01-07 -0.011667 -0.012536 -0.00737 0.068361 -0.029598 0.028815 -0.00311 -0.017312 -0.028041 ... -0.012117 0.001282 -0.026537 -0.021545 -0.004383 -0.001957 -0.010303 -0.009249 -0.032037 -0.004175
4 2020-01-08 0.0035 -0.008511 -0.000335 0.058527 -0.027694 0.020972 0.010286 -0.025866 -0.039207 ... -0.005919 0.000321 -0.027078 -0.024299 -0.001002 -0.000196 -0.044378 0.002614 -0.022502 -0.006337
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
248 2020-12-24 0.005657 0.265764 0.216934 0.797346 0.244019 0.561095 0.494663 0.869857 0.015731 ... -0.196728 0.063304 -0.361659 0.418264 0.272085 0.072813 0.454388 0.008309 -0.142295 0.205015
249 2020-12-28 0.006752 0.259222 0.219173 0.853622 0.255526 0.569161 0.491942 0.86558 0.003847 ... -0.189144 0.074656 -0.34187 0.41357 0.273608 0.095159 0.476383 0.005877 -0.141899 0.217536
250 2020-12-29 0.001797 0.265531 0.233904 0.901131 0.246243 0.568132 0.50139 0.845621 -0.009491 ... -0.19729 0.076301 -0.35233 0.397388 0.262695 0.091867 0.458825 0.029928 -0.153575 0.22301
251 2020-12-30 0.003295 0.266816 0.240622 0.918844 0.245179 0.571736 0.487456 0.879633 -0.00336 ... -0.20368 0.080085 -0.345756 0.419363 0.27881 0.092465 0.48086 0.028374 -0.14467 0.231632
252 2020-12-31 0.007213 0.279082 0.262778 0.920502 0.262923 0.593532 0.49544 0.867821 -0.004308 ... -0.207682 0.096866 -0.337181 0.416067 0.291626 0.082988 0.483098 0.041007 -0.140316 0.240854

253 rows × 490 columns

In [5]:
cum_growth_long = pd.merge(cum_growth.melt(id_vars='Date', var_name='Ticker', value_name='Cum_Growth'),
                    summ, on='Ticker', how='outer')
cum_growth_long.to_csv('data/cum_growth_long.csv')
cum_growth_long
Out[5]:
Date Ticker Cum_Growth Security Industry Weight
0 2020-01-02 MMM 0.0 3M Company Industrials 0.003330
1 2020-01-03 MMM -0.008611 3M Company Industrials 0.003330
2 2020-01-06 MMM -0.007667 3M Company Industrials 0.003330
3 2020-01-07 MMM -0.011667 3M Company Industrials 0.003330
4 2020-01-08 MMM 0.0035 3M Company Industrials 0.003330
... ... ... ... ... ... ...
123712 2020-12-24 ZTS 0.205015 Zoetis Health Care 0.002529
123713 2020-12-28 ZTS 0.217536 Zoetis Health Care 0.002529
123714 2020-12-29 ZTS 0.22301 Zoetis Health Care 0.002529
123715 2020-12-30 ZTS 0.231632 Zoetis Health Care 0.002529
123716 2020-12-31 ZTS 0.240854 Zoetis Health Care 0.002529

123717 rows × 6 columns

In [6]:
growth_ts('all', summ_sorted, cum_growth_long).show()
growth_ts(100, summ_sorted, cum_growth_long).show()
growth_ts(25, summ_sorted, cum_growth_long).show()
In [7]:
growth_year = cum_growth_long.loc[cum_growth_long.Date=='2020-12-31',].rename(columns={'Cum_Growth': 'Growth'})
growth_year
Out[7]:
Date Ticker Growth Security Industry Weight
252 2020-12-31 MMM 0.007213 3M Company Industrials 0.003330
505 2020-12-31 ABT 0.279082 Abbott Laboratories Health Care 0.006305
758 2020-12-31 ABBV 0.262778 AbbVie Inc. Health Care 0.006060
1011 2020-12-31 ABMD 0.920502 ABIOMED Inc Health Care 0.000414
1264 2020-12-31 ACN 0.262923 Accenture plc Information Technology 0.005509
... ... ... ... ... ... ...
122704 2020-12-31 YUM 0.082988 Yum! Brands Inc Consumer Discretionary 0.001074
122957 2020-12-31 ZBRA 0.483098 Zebra Technologies Information Technology 0.000655
123210 2020-12-31 ZBH 0.041007 Zimmer Biomet Health Care 0.001019
123463 2020-12-31 ZION -0.140316 Zions Bancorp Financials 0.000225
123716 2020-12-31 ZTS 0.240854 Zoetis Health Care 0.002529

489 rows × 6 columns

In [8]:
growth_by_weight(growth_year)
In [9]:
def agg_functions(group):
    tot_weight = group.Weight.sum()
    avg_growth = sum(group.Growth * group.Weight) / tot_weight
    
    return pd.Series({'Growth': avg_growth, 'Weight': tot_weight})

industry_summ = growth_year.groupby('Industry').apply(agg_functions).reset_index()
industry_summ
Out[9]:
Industry Growth Weight
0 Communication Services 0.262197 0.110983
1 Consumer Discretionary 0.401345 0.112045
2 Consumer Staples 0.135686 0.067282
3 Energy -0.325362 0.024600
4 Financials 0.015153 0.105272
5 Health Care 0.162269 0.135533
6 Industrials 0.143372 0.083820
7 Information Technology 0.492218 0.280257
8 Materials 0.254763 0.026901
9 Real Estate 0.030406 0.024954
10 Utilities 0.048888 0.028353
In [10]:
growth_by_weight(industry_summ, industry=True)
In [11]:
cum_weight = pd.DataFrame({'n': summ_sorted.index+1, 'cum_weight': summ_sorted.Weight.cumsum()})
px.line(cum_weight, x='n', y='cum_weight', 
        labels={'n': '<b># Stocks</b>', 'cum_weight': '<b>Cumulative Weight</b>'},
        title='<b>Distribution of weight among S&P 500 Index stocks (2020)',
        width=1000, height=600)